## SURVEY DATA: Tables 3,5,6,7,8  ####

## Setup and code for clustered SEs

rm(list=ls())
library(foreign)
library(xtable)
library(ggplot2)
library(lmtest)
setwd("~/Rep File")

vcovCluster <- function(
  model,
  cluster
)
{
  require(sandwich)
  require(lmtest)
  if(nrow(model.matrix(model))!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  if(M<50){
    warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap instead).")
  }
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}

## Read Data

# d is the survey data, v is the instructions for converting d into our binary outcomes

d <- read.csv("SurveyData.csv", stringsAsFactors = F)
for(i in 3:66){
  d[,i] <- as.numeric(d[,i])
}
# for clustering and FE, missing data on date, state, and year are counted as separate cluster/group
d$date <- ifelse(is.na(d$date),"miss",d$date)
d$State <- ifelse(is.na(d$State),"miss",d$State)
d$year <- ifelse(is.na(d$year),"miss",d$year)

v <- read.csv("SurveyOutcomes.csv",stringsAsFactors = F)

## Results Tables (3,5,6,7,8)

# results is a master results table from which Tables 3,5,6,7,8 are created

results <- matrix(nrow=nrow(v),ncol=10)
colnames(results) <- c("Table","Label","N","Non-Drafted","Drafted","ATE","SE","P-Value","Cluster.SE","Cluster.P")
results[,1] <- v[,3]
results[,2] <- v[,1]

# Analysis here is repeated for each outcome variable listed in v:
for(i in 1:nrow(v)){
  y <- eval(parse(text=v[i,2])) # The variable is read in via the code in v.
  dr <- d$Drafted[!is.na(y)&!is.na(d$Drafted)] # dr: drafted (binary treatment assignment)
  yob <- d$year[!is.na(y)&!is.na(d$Drafted)] # yob: year of birth (for FE)
  st <- d$State[!is.na(y)&!is.na(d$Drafted)] # st: state of birth (for FE)
  dates <- d$date[!is.na(y)&!is.na(d$Drafted)] # dates: date of birth (to cluster SEs)
  y <- y[!is.na(y)&!is.na(d$Drafted)] # observations are dropped if there is no treatment assignment data
  results[i,3] <- length(y[y!="miss"]) # records N
  results[i,4:5] <- round(c(mean(y[dr==0]),mean(y[dr==1]))*100,1) # means in treatment and control
  base.model <- lm(y~dr+as.factor(yob)+as.factor(st)) # bivariate linear regression with year and state FE
  coefs <- summary(base.model)$coefficients
  if(v$Table[i]=="Balance") coefs <- summary(lm(y~dr))$coefficients
  results[i,6:8] <- c(round(coefs[2,1:2]*100,1),round(coefs[2,4],3))
  results[i,9:10] <- coeftest(base.model,vcovCluster(base.model,cluster=dates))[2,c(2,4)] # calculate clustered SEs
  results[i,9] <- round(as.numeric(results[i,9])*100,1)
  results[i,10] <- round(as.numeric(results[i,10]),3)
}

tablenames <- unique(results[,"Table"])
tablenums <- unique(v[,"TableNum"])

# Create balance table (Table 3)
t <-results[results[,"Table"]=="Balance",c(3:5,7:8)]
rownames(t) <- results[results[,"Table"]=="Balance",2]
#print.xtable(xtable(t),file = paste0("Table 3.tex")) #uncomment to make latex table
print("TABLE 3")
t

# Create all other tables (Tables 5-8 and Appendix Tables 2-3)
for(i in 2:length(tablenames)){
  t <- results[results[,"Table"]==tablenames[i],c(3,6,9:10)]
  rownames(t) <- results[results[,"Table"]==tablenames[i],2]
  #print.xtable(xtable(t),file = paste0("Table ",tablenums[i],".tex")) #uncomment to make latex table
  print(paste0("TABLE ",tablenums[i]))
  print(t)
}


## NATIONAL VOTER FILE: Tables 9-10, A4-A5 ####

## Read in list of birthdates

dates <- read.csv("VoterInfoByBirthdate.csv",stringsAsFactors = F)
timetrend <- as.numeric(as.factor(dates$birthdate))
yearofbirth <- as.numeric(substr(dates$birthdate,1,4))

## Table 9: Voting Behavior by Draft Status

dates$votedsum <- rowSums(cbind(dates[,c("voted08","voted10","voted12","voted14")])) #sum of all votes cast by birthdate
years <- c("08","10","12","14","sum")

voting <- matrix(NA,nrow=5,ncol=5)
rownames(voting) <- c("2008","2010","2012","2014","Total")
colnames(voting) <- c("Birthdates","Mean","ATE","SE","P-Value")

#Same process for each outcome: linear regression of votes by drafted status with birth-year FE and a linear time trend
for(i in 1:length(years)){
  voting[i,1] <- nrow(dates)
  votes <- dates[,paste0("voted",years[i])]
  coefs <- summary(lm(votes~drafted+as.factor(yearofbirth)+timetrend,data=dates))$coefficients
  voting[i,2:5] <- c(mean(votes),coefs[2,1:2],coefs[2,4])
}

#print.xtable(x=xtable(voting,digits=c(0,0,1,1,1,3)),file = "Table 9.tex")
print("TABLE 9")
voting

## Table 10: Registration by Draft Status

resultstable <- matrix(nrow=6,ncol=5)
rownames(resultstable) <- c("Registered Voters","Registered Voters (Party States)","Registered Dems","Registered Reps","Scored Dems","Scored Reps")
colnames(resultstable) <- c("Birthdates","Mean","ATE","SE","P-Value")
ys <- c("N","N_reg","dem","rep","dem_score","rep_score")

#Same process for each outcome: linear regression of votes by drafted status with birth-year FE and a linear time trend
for(i in 1:6){
  y <- dates[,ys[i]]
  coefs <- summary(lm(y~drafted+as.factor(yearofbirth)+timetrend,data=dates))$coefficients
  resultstable[i,1] <- length(y)
  resultstable[i,2:5] <- c(mean(y),coefs[2,1:2],coefs[2,4])
}

#print.xtable(x=xtable(resultstable,digits=c(0,0,1,1,1,3)),file = "Table 10.tex")

print("TABLE 10")
resultstable

## Table A4: Voting by Draft Number

dates$votedsum <- rowSums(cbind(dates[,c("voted08","voted10","voted12","voted14")]))
years <- c("08","10","12","14","sum")

voting <- matrix(NA,nrow=5,ncol=5)
rownames(voting) <- c("2008","2010","2012","2014","Total")
colnames(voting) <- c("Birthdates","Mean","ATE","SE","P-Value")

for(i in 1:length(years)){
  voting[i,1] <- nrow(dates)
  votes <- dates[,paste0("voted",years[i])]
  coefs <- summary(lm(votes~number+as.factor(yearofbirth)+timetrend,data=dates))$coefficients
  voting[i,2:5] <- c(mean(votes),coefs[2,1:2],coefs[2,4])
}

#print.xtable(x=xtable(voting,digits=c(0,0,1,3,3,3)),file = "Table A4.tex")
print ("TABLE A4")
voting

## Table A5: Registration by Draft Number

resultstable <- matrix(nrow=6,ncol=5)
rownames(resultstable) <- c("Registered Voters","Registered Voters (Party States)","Registered Dems","Registered Reps","Scored Dems","Scored Reps")
colnames(resultstable) <- c("Birthdates","Mean","ATE","SE","P-Value")

ys <- c("N","N_reg","dem","rep","dem_score","rep_score")

for(i in 1:6){
  y <- dates[,ys[i]]
  coefs <- summary(lm(y~number+as.factor(yearofbirth)+timetrend,data=dates))$coefficients
  resultstable[i,1] <- length(y)
  resultstable[i,2:5] <- c(mean(y),coefs[2,1:2],coefs[2,4])
}

#print.xtable(x=xtable(resultstable,digits=c(0,0,1,3,3,3)),file = "Table A5.tex")
print("TABLE A5")
resultstable


## NATIONAL VOTER FILE ON INDIVIDUAL LEVEL (Tables A6-A7) ####

## Table A6: Voting Behavior, Individual-Level Analysis ####

voting <- read.csv("VotersByIndividualVote.csv",stringsAsFactors = F)
#Individual-level voting analysis

timetrend <- as.numeric(as.factor(voting$birthdate))
yearofbirth <- substr(voting$birthdate,0,4)

#clustering SE function
cl   <- function(dat,fm, cluster){
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

## Same process as Table 9, just on individual level:
# Draft effect on voting rate, with Year FE, State FE, and Linear time trend, SEs Clustered by birthdate

voting$votedsum <- rowSums(cbind(voting[,c("voted08","voted10","voted12","voted14")]))/4
years <- c("08","10","12","14","sum")

votingtable <- matrix(NA,nrow=5,ncol=4)
rownames(votingtable) <- c("2008","2010","2012","2014","Total")
colnames(votingtable) <- c("N","Drafted","SE","P-Value")

for(i in 1:length(years)){
  votingtable[i,1] <- nrow(voting)
  votes <- voting[,paste0("voted",years[i])]
  coefs <- cl(dat=voting,fm = lm(votes~voting$drafted+as.factor(voting$state)+as.factor(yearofbirth)+timetrend),cluster=voting$birthdate)
  votingtable[i,2:4] <- c(coefs[2,1:2]*100,coefs[2,4])
}
rm(voting)

#print.xtable(x=xtable(votingtable,digits=c(0,0,3,3,3)),file = "Table A6.tex")
print("TABLE A6")
votingtable


## Table A7: Party Registration, Individual-Level Analysis

reg <- read.csv("VotersByIndividualReg.csv",stringsAsFactors = F)

states <- unique(reg$state)
reg <- reg[reg$state %in% states[(states%in% unique(reg$state[reg$party=="DEM"]))],]
timetrend <- as.numeric(as.factor(reg$birthdate))
yearofbirth <- as.numeric(substr(reg$birthdate,1,4))

## Same process as Table 10, just on individual level:
# Draft effect on party registration, with Year FE, State FE, and Linear time trend, SEs Clustered by birthdate

regtable <- matrix(nrow=4,ncol=4)
rownames(regtable) <- c("Registered Dem","Registered Rep","Scored Dem","Scored Rep")
colnames(regtable) <- c("N","Drafted","SE","P-Value")

ys <- c("DEM","REP","DEM","REP")

for(i in 1:2){
  y <- reg$party==ys[i]
  coefs <- cl(dat=reg,fm = lm(y~reg$drafted+as.factor(reg$state)+as.factor(yearofbirth)+timetrend),cluster=reg$birthdate)
  regtable[i,1] <- length(y)
  regtable[i,2:4] <- c((coefs[2,1:2])*100,coefs[2,4])
}
rm(reg)

## Party Score

score <- read.csv("VotersByIndividualScore.csv",stringsAsFactors = F)

states <- unique(score$state)
timetrend <- as.numeric(as.factor(score$birthdate))
yearofbirth <- as.numeric(substr(score$birthdate,1,4))

for(i in 3:4){
  y <- score$party==ys[i]
  coefs <- cl(dat=score,fm = lm(y~score$drafted+as.factor(score$state)+as.factor(yearofbirth)+timetrend),cluster=score$birthdate)
  regtable[i,1] <- length(y)
  regtable[i,2:4] <- c((coefs[2,1:2])*100,coefs[2,4])
}
rm(score)

#print.xtable(x=xtable(regtable,digits=c(0,1,3,3,3)),file = "Table A7.tex")

print("TABLE A7")
regtable

